---
title: "Leapfrogging or Lagging? Drivers of Social Equity from Renewable Energy Transitions Globally"
subtitle: "Replication Code"
author: "Timothy Fraser, Andrew Chapman, Yosuke Shigetomi"
date: "March 13, 2021"
output: html_notebook
---

This study replicates our analysis of drivers on social equity in renewable energy transitions internationally. Please feel free to be in touch with the authors if you have questions.

# 0. Packages

```{r, message = FALSE, warning = FALSE}
# Load packages
library(tidyverse) # for data manipulation
library(readxl) # for importing xlsx and xls files

# For color palletes
library(viridis)
library(wesanderson)
# For model effect visualization
library(ggeffects)
library(Zelig)

# Now let's use the remotes package to install 
# some packages in development from github.
library(remotes)
#install_github('vincentarelbundock/WDI')
library(WDI)
# https://cengel.github.io/gearup2016/worldbank.html
#install_github("nset-ornl/wbstats")
library(wbstats)
# https://github.com/nset-ornl/wbstats
#install_github("leifeld/texreg")
library(texreg)
``` 

# 1.0 Get Data


## 1.1 World Bank World Development Indicators

```{r}
my_indicators <- c(
  pop = "SP.POP.TOTL",
  # Population Density (people per square kilometer)
  pop_density = "EN.POP.DNST",
  # GDP per capita
  gdp_capita ="NY.GDP.PCAP.CD", 
  # Gini Inequality (Income inequality)
  gini = "SI.POV.GINI",
  # Oil Rents as a % of GDP
  oil_rents = "NY.GDP.PETR.RT.ZS",
  #  	Electricity production from coal sources (% of total)	NA	
  coal =  "EG.ELC.COAL.ZS",
  # Electricity production from oil, gas and coal sources (% of total)	NA	
  fossil = "EG.ELC.FOSL.ZS",
  #Electricity production from hydroelectric sources (% of total)	NA	
  hydro = "EG.ELC.HYRO.ZS",
  # Electricity production from natural gas sources (% of total)	NA	
  natural_gas = "EG.ELC.NGAS.ZS",
  #Electricity production from nuclear sources (% of total)	NA	
  nuclear = "EG.ELC.NUCL.ZS",
  # Electricity production from oil sources (% of total)	NA	
  oil = "EG.ELC.PETR.ZS",
  #Electricity production from renewable sources, excluding hydroelectric (% of total)
  #including, solar, wind, biomass, etc.
  renewables = "EG.ELC.RNWX.ZS",
  # Electricity production from combustible renewables and waste,
  # including solid and liquid biomass, municipal waste, etc.
  biomass = "EG.USE.CRNW.ZS",
  # CO2 Emissions (kilotons of CO2)
  emissions_carbon = "EN.ATM.CO2E.KT",
  # Greenhouse Gas Emissions (kilotons of CO2 equivalent)
  # "Total greenhouse gas emissions in kt of CO2 equivalent are composed of CO2 totals excluding short-cycle biomass burning (such as agricultural waste burning and savanna burning) but including other biomass burning (such as forest fires, post-burn decay, peat fires and decay of drained peatlands), all anthropogenic CH4 sources, N2O sources and F-gases (HFCs, PFCs and SF6)."
  emissions_ghg = "EN.ATM.GHGT.KT.CE",
  # CPIA gender equality rating (1=low to 6=high)
  gender_equality = "IQ.CPA.GNDR.XQ",
  # "The percentage of population ages 25 and over that attained or completed upper   secondary education."
  edu_second_pc = "SE.SEC.CUAT.UP.ZS",
  # "The percentage of population ages 25 and over that attained or completed Bachelor's or equivalent."
  edu_uni_pc = "SE.TER.CUAT.BA.ZS",
  # "The percentage of population ages 25 and over that attained or completed short-cycle tertiary education."
  edu_some_college_pc = "SE.TER.CUAT.ST.FE.ZS",
  # Life expectancy in years
  life_exp = "SP.DYN.LE00.IN", 
  # CPIA transparency, accountability, and corruption 
  # in the public sector rating (1=low to 6=high)	
  corruption = "IQ.CPA.TRAN.XQ",
  
  # Total land area (sq. km)
  area = "AG.LND.TOTL.K2", 
  # Rural land area (sq.km)
  area_rural = "AG.LND.TOTL.RU.K2",
  # Urbal land area (sq.km)
  area_urban = "AG.LND.TOTL.UR.K2",
  
  # Unemployment Rate (% of total labor force)
  unemployment = "SL.UEM.TOTL.ZS", # Modeled ILO Estimate
  unemployment_nat = "SL.UEM.TOTL.NE.ZS", # National Estimate
  
  # Measures Coverage of Social Protection and Labor Programs (% of population)
  # "Coverage of social protection and labor programs (SPL) shows the percentage of population participating in social insurance, social safety net, and unemployment benefits and active labor market programs. Estimates include both direct and indirect beneficiaries."
  welfare_coverage = "per_allsp.cov_pop_tot",
  # "Adequacy of social protection and labor programs (% of total welfare of beneficiary households)"
  # "Adequacy of social protection and labor programs (SPL) is measured by the total transfer amount received by the population participating in social insurance, social safety net, and unemployment benefits and active labor market programs as a share of their total welfare. Welfare is defined as the total income or total expenditure of beneficiary households. Estimates include both direct and indirect beneficiaries."
  welfare_adequacy = "per_allsp.adq_pop_tot",
  # Income share held by XXXX 20%
  # Percentage share of income or consumption is the share that accrues to subgroups of population indicated by deciles or quintiles. Percentage shares by quintile may not sum to 100 because of rounding.
  income_0_20 = "SI.DST.FRST.20",
  income_21_40 = "SI.DST.02ND.20",
  income_41_60 = "SI.DST.03RD.20",
  income_61_80 = "SI.DST.04TH.20",
  income_81_100 = "SI.DST.05TH.20",
  
  # Percentage of Population per Age Group
  pop_age_0004_women = "SP.POP.0004.FE.5Y",
  pop_age_0004_men = "SP.POP.0004.MA.5Y",
  pop_age_0509_women = "SP.POP.0509.FE.5Y",
  pop_age_0509_men = "SP.POP.0509.MA.5Y",
  pop_age_1014_women = "SP.POP.1014.FE.5Y",
  pop_age_1014_men = "SP.POP.1014.MA.5Y",
  pop_age_1519_women = "SP.POP.1519.FE.5Y",
  pop_age_1519_men = "SP.POP.1519.MA.5Y",
  pop_age_2024_women = "SP.POP.2024.FE.5Y",
  pop_age_2024_men = "SP.POP.2024.MA.5Y",
  pop_age_2529_women = "SP.POP.2529.FE.5Y",
  pop_age_2529_men = "SP.POP.2529.MA.5Y",
  
  pop_age_3034_women = "SP.POP.3034.FE.5Y",
  pop_age_3034_men = "SP.POP.3034.MA.5Y",
  pop_age_3539_women = "SP.POP.3539.FE.5Y",
  pop_age_3539_men = "SP.POP.3539.MA.5Y",
  pop_age_4044_women = "SP.POP.4044.FE.5Y",
  pop_age_4044_men = "SP.POP.4044.MA.5Y",
  pop_age_4549_women = "SP.POP.4549.FE.5Y",
  pop_age_4549_men = "SP.POP.4549.MA.5Y",
  pop_age_5054_women = "SP.POP.5054.FE.5Y",
  pop_age_5054_men = "SP.POP.5054.MA.5Y",
  pop_age_5559_women = "SP.POP.5559.FE.5Y",
  pop_age_5559_men = "SP.POP.5559.MA.5Y",
  pop_age_6064_women = "SP.POP.6064.FE.5Y",
  pop_age_6064_men = "SP.POP.6064.MA.5Y",
  
  pop_age_6569_women = "SP.POP.6569.FE.5Y",
  pop_age_6569_men = "SP.POP.6569.MA.5Y",
  pop_age_7074_women = "SP.POP.7074.FE.5Y",
  pop_age_7074_men = "SP.POP.7074.MA.5Y",
  pop_age_7579_women = "SP.POP.7579.FE.5Y",
  pop_age_7579_men = "SP.POP.7579.MA.5Y",
  pop_age_80_plus_women = "SP.POP.80UP.FE.5Y",
  pop_age_80_plus_men = "SP.POP.80UP.MA.5Y")

# Download these indicators 
wb_data(my_indicators, start_date = 1990, end_date = 2015) %>%
  saveRDS("raw_data/worldbank_output.rds")


# also download demographic data
wb_data(c( pop_rural = "SP.RUR.TOTL",
           pop_urban = "SP.URB.TOTL"),
        start_date = 1990,
        end_date = 2015) %>%
  select(code = iso3c, year = date, pop_rural, pop_urban) %>%
  saveRDS("raw_data/demographics.rds")
#wb_search(pattern = "urban pop", fields = "indicator")
```


## 1.2 Median Age Bracket

```{r, message = FALSE}
read_rds("raw_data/worldbank_output.rds") %>% 
  # get population and age breakdown per country-year
  select(code= iso3c, country, year = date, pop, contains("_age")) %>%
  # pivot longer
  pivot_longer(cols = -c(code, country, year, pop),
               names_to = "label",
               values_to = "value") %>%
  # Get original age label, separate from gender
  # Turn each age range into a number, which is the halfway interval
  # For example, for 0 to 4, choose 2.5, for 5-9, choose 7.5, etc.
  # This will allow us to create a numeric/ordinal measure of age,
  # where 2.5 stands for 0-4
  mutate(age = str_remove(label, pattern = "[_]women|[_]men") %>% 
           str_remove(pattern = "pop_age_") %>%
           str_sub(1,2) %>%
           as.numeric() + 2.5) %>%
  mutate(value = value/100 * pop) %>%
  # Calculate for each country the population per age category
  group_by(code, country, year, age) %>%
  summarize(value = sum(value, na.rm = TRUE)) %>%
  ungroup() %>%
  # Now sort so it goes USA 1990 2.5, USA 1990 7.5, etc.
  arrange(code, country, year, age) %>%
  # Identify the median age bracket for each country-year
  group_by(code, country, year) %>%
  mutate(upper = cumsum(value)) %>%
  mutate(median = (max(cumsum(value)) + 1)/2) %>%
  mutate(lower = lag(upper)) %>%
  filter(median >= lower,
         median <= upper) %>%
  select(code, country, year, age) %>%
  saveRDS("raw_data/median_age_bracket.rds")
```

## 1.3 Voter Turnout

```{r, message = FALSE, warning = FALSE}
# Voter Turnout Database 
# from the International Institute for Democracy and Electoral Assistance
vote <- read_excel("raw_data/voter_turnout.xlsx") %>%
  select(country = Country, type = `Election type`, year = Year, 
         voter_turnout = `Voter Turnout`) %>%
  fill(country:type, .direction = "down")

# Now, for the countries reported in that dataset,
# Let's create a full grid of all possible country-years, 
# not just years in which elections occurred
expand_grid(country = unique(vote$country),
            year = 1985:2015) %>%
  left_join(by = c("country", "year"),
            y = vote) %>%
  # In the event that there are returns 
  # for both presidential and parliamentary elections,
  # let's take the average
  group_by(country, year) %>%
  summarize(voter_turnout = mean(voter_turnout, na.rm = TRUE)) %>%
  mutate(voter_turnout = if_else(is.nan(voter_turnout), NA_real_, voter_turnout)) %>%
  # Now, fill in subsequent years with the most recent previous voter turnout
  arrange(country, year) %>%
  group_by(country) %>%
  fill(voter_turnout, .direction = "down") %>%
  # But if a regime did not have a specified voter turnout PRIOR to a certain year,
  # we're going to fill that in with 0
  # because that reflects autocracy
  mutate(voter_turnout = if_else(is.na(voter_turnout), 0, voter_turnout)) %>%
  # Now let's join in the correct country code here
  left_join(by = c("country"), 
              y = wb_countries() %>% 
                select(country, code = iso3c)) %>%
    mutate(code = if_else(
      condition = is.na(code), 
      true = country %>% dplyr::recode(
        "Iran, Islamic Republic of" = "IRN",
        "Bahamas" = "BHS",
        "Cape Verde" = "CPV",
        "Congo, Democratic Republic of" = "COD",
        "Egypt" = "EGY",
        "Gambia" = "GMB",
        "Korea, Republic of" = "KOR",
        "Kyrgyzstan" = "KGZ",
        "Lao People's Dem. Republic" = "LAO",
        "Micronesia, Federated States of" = "FSM",
        "Moldova, Republic of" = "MDA",
        "North Macedonia, Republic of" = "MKD",
        "Republic of The Congo (Brazzaville)" = "COG",
        "Saint Kitts and Nevis" = "KNA",
        "Saint Lucia" = "LCA",
        "Saint Vincent and The Grenadines" = "VCT",
        "Slovakia" = "SVK",
        "Taiwan" = "TWN",
        "Tanzania, United Republic of" = "TZA",
        "Venezuela" = "VEN",
        "Viet Nam" = "VNM",
        "Virgin Islands, British" = "VGB",
        "Yemen" = "YEM"),
      false = code)) %>%
  ungroup() %>%
  # Save to file!
  saveRDS("raw_data/voter_turnout.rds")

remove(vote)

```

## 1.4 World Governance Indicators

```{r, message= FALSE, warning = FALSE}
# Government Effectiveness Index from
# World Governance Indicators (1996-present)
# https://info.worldbank.org/governance/wgi/

get_data = function(sheet){
# Extract original dataset
dat <- read_excel("raw_data/wgidataset.xlsx", 
                  sheet = sheet, skip = 13) 
# Now edit dataset
dat <- dat %>%   
  # Set as new column names...
  magrittr::set_colnames(value = paste(
    # Grab the year
    dat %>%
      names() %>%
      str_extract(pattern = "[0-9]{4}"),
    # Grab the type of the value in that column, eg. estimate, std. error
    dat %>%
      slice(1) %>%
      unlist() %>%
      unname(),
    sep = "-")) %>%
  # fix the first two names
  rename(country = 1, code = 2) %>%
  # and get rid of row 1, which has subtitles
  slice(-1) %>%
  # transform all values into numbers
  mutate_at(vars(-c(country, code)),
            funs(if_else(. == "#N/A", NA_real_, as.numeric(.)))
  ) %>%
  # now pivot longer into a tidy format
  pivot_longer(
    cols = -c(country, code),
    names_to = "measure",
    values_to = "value") %>%
  # separate out into year and measure
  separate(col = measure, into = c("year", "measure"), sep = "-", remove = TRUE) %>%
  mutate(year = as.numeric(year)) %>%
  # Only keep values that are not missing
  filter(!is.na(value)) %>%
  # Label data.frame
  mutate(type = tolower(sheet)) %>%
  return()
}

# For each sheet
c("VoiceandAccountability", "Political StabilityNoViolence", 
         "GovernmentEffectiveness", "RegulatoryQuality", 
         "RuleofLaw", "ControlofCorruption") %>%
  # run the extraction function,
  lapply(get_data) %>%
  # bind the dataset together
  bind_rows() %>%
  # give clearer, R-friendly titles
  mutate(type = type %>% dplyr::recode(
    "voiceandaccountability" = "voice_accountability",
    "political stabilitynoviolence" = "political_stability",
    "governmenteffectiveness" = "government_effectiveness",
    "regulatoryquality" = "regulatory_quality",
    "ruleoflaw" = "rule_of_law",
    "controlofcorruption" = "control_corruption")) %>%
  # and save 
  saveRDS("raw_data/wgi.rds")

read_rds("raw_data/wgi.rds") %>%
  filter(str_detect(country, "Argentina"))
```
```{r}
read_rds("data/dataset_filled.rds") %>%
  select(code, country, year,
         regulatory_quality, rule_of_law, voice_accountability,
         political_stability, government_effectiveness, 
         control_corruption
         #emissions_ghg
         ) %>%
  filter(is.na(regulatory_quality))
```


## 1.5 Polity 5 Scores

```{r}
# Let's also import POLITY 5 scores, from the Center for Systemic Peace.
# POLITY measures the strength of democracy on a scale from +10 (full democracy) to -10 (full autocracy).
# Source: https://www.systemicpeace.org/inscrdata.html
read_excel("raw_data/p5v2018.xls") %>%
  select(code = scode, country, year, polity2) %>%
  mutate(polity2 = if_else(polity2 %in% c(-10:10), polity2, NA_real_)) %>%
  # Filter to just our years of study
  filter(year %in% c(1990:2015)) %>%
  select(-code) %>%
  # Join in current ISO3C codes
  left_join(by = "country",
            y = wb_countries() %>%
              select(country, code = iso3c)) %>%
  # Fix mistaken codes
  mutate(code = if_else(
    condition = is.na(code),
    true = country %>% dplyr::recode(
      "Algeria" = "DZA",
      "Bosnia" = "BIH",
      "Cape Verde" = "CPV",
      "Congo-Brazzaville" = "COG",
      "Congo Brazzaville" = "COG",
      "Czechoslovakia" = "CZE",
      "Egypt" = "EGY",
      "Timor Leste" = "TLS",
      "Gambia" = "GMB",
      "Germany West" = "DEU",
      "Iran" = "IRN",
      "Cote D'Ivoire" = "CIV",
      "Ivory Coast" = "CIV",
      "Kyrgyzstan" = "KGZ",
      "Laos" = "LAO",
      "Macedonia" = "MKD",
      "Myanmar (Burma)" = "MMR",
      "Korea North" = "PRK",
      "Korea South" = "KOR",
      "Russia" = "RUS",
      "Sudan-North" = "SDN",
      "Syria" = "SYR",
      "Taiwan" = "TWN",
      "UAE" = "ARE",
      "Venezuela" = "VEN",
      "Yemen" = "YEM",
      "Serbia and Montenegro" = "SRB",
      "Congo Kinshasa" = "COD"),
    false = code)) %>%
  # In the even that a country has two scores, average them
  group_by(code, year) %>%
  summarize(polity2 = mean(polity2, na.rm = TRUE)) %>%
  # save
  distinct() %>%
  saveRDS("raw_data/polity.rds")

```

## 1.6 War and Violence

```{r}
# Major Episodes of Political Violence
# Let's also import a binary indicator for each country-year telling us when there was an ongoing conflict in a country
# Also compiled by Center for Systemic Peace

# Each county-year reports the total number of major episodes of political violence, which includes societal violence, such as civil violence, civil ware, ethnic violence, or ethnic warfare, as well as interstate violence, such as international violence or international warfare.
# Each of those conflicts is ranked from 1 (lowest) to 10 (highest), where 0 denotes no violence. 
# Then, all acts of violence (and their scores) are summed together to create a total score per country-year describing the magnitude of political violence. This is ACTOTAL.

read_excel("raw_data/MEPVv2018.xls") %>%
  select(code = scode, country, year, violence = actotal) %>%
  filter(year %in% 1990:2015) %>%
  
   select(-code) %>%
  # Join in current ISO3C codes
  left_join(by = "country",
            y = wb_countries() %>%
              select(country, code = iso3c)) %>%
  # Fix mistaken codes
  mutate(code = if_else(
    condition = is.na(code),
    true = country %>% dplyr::recode(
      "Algeria" = "DZA",
      "Bosnia" = "BIH",
      "Cape Verde" = "CPV",
      "Congo-Brazzaville" = "COG",
      "Congo Brazzaville" = "COG",
      "Czechoslovakia" = "CZE",
      "Egypt" = "EGY",
      "Timor Leste" = "TLS",
      "Gambia" = "GMB",
      "Germany West" = "DEU",
      "Iran" = "IRN",
      "Cote D'Ivoire" = "CIV",
      "Ivory Coast" = "CIV",
      "Kyrgyzstan" = "KGZ",
      "Laos" = "LAO",
      "Macedonia" = "MKD",
      "Myanmar (Burma)" = "MMR",
      "Korea North" = "PRK",
      "Korea South" = "KOR",
      "Russia" = "RUS",
      "Sudan-North" = "SDN",
      "Syria" = "SYR",
      "Taiwan" = "TWN",
      "UAE" = "ARE",
      "Venezuela" = "VEN",
      "Yemen" = "YEM",
      "Serbia and Montenegro" = "SRB",
      "Congo Kinshasa" = "COD"),
    false = code)) %>%
  
  saveRDS("raw_data/violence.rds")
```

## 1.7 Equity Scores

```{r}
read_excel("raw_data/Equity and New RE Scores.xlsx") %>% 
  rename(country = Nation) %>%
  pivot_longer(
    cols = -country,
    names_to = "year",
    values_to = "equity") %>%
  mutate(year = year %>% str_extract(pattern = "[0-9]{4}") %>% as.numeric) %>%
  saveRDS("raw_data/equity.rds")
```

## 1.8 Policies

```{r, eval = FALSE}
# Next, we webscrape from IRENA's dataset
# the dataset of all policies recorded over time fitting the following criteria
# Reneable Energy Policies
# At the National Level
# In the Electricity or Generation Sectors

# Let's load our webscraping packages
library(tidyverse)
library(rvest)
library(xml2)


# Extract table of policy links
get_links = function(url){
  # Paste the front part of the link
  paste("https://www.iea.org",
        # Followed by the unique part of the link,
        # which we obtain by reading the html code for that page,
        read_html(url) %>%
          #html_node("body") %>%
        # And grabbing every time they mention the policy link
          xml_find_all("//a[contains(@class, 'm-policy-listing-item__link')]") %>%
          html_attr("href"),
        sep = "") %>%
    return()
}

# The following code might take a minute:
# For each of the ~40 pages of links
paste("https://www.iea.org/policies?jurisdiction=National&sector=Generation%2CElectricity&topic=Renewable%20Energy&page=", 1:40, sep = "") %>%
  # Run this function, and return as a list
  lapply(get_links) %>%
  # Convert each list item to a data.frame
  lapply(as.data.frame) %>%
  # Bind together
  bind_rows() %>%
  # and give a name
  rename(link = 1) %>%
  # remove rownames to reduce filesize and annoyances
  tibble::remove_rownames() %>%
  # save to file
  saveRDS("raw_data/iea_policy_links.rds")
```

```{r, eval = FALSE}
# Import the policy links
out <- read_rds("raw_data/iea_policy_links.rds") %>%
  # create an id
  mutate(id = 1:n()) %>%
  # filter to just policies with actual links (only 5 mistaken entries)
  filter(link %>% str_detect("policies"))

get_policies = function(data){

  # Download html code of that policy based on its url
  dat <- data$link %>%
    read_html()
  
  # Get policy name
  name <- dat %>%
    xml_find_all("//h1[contains(@class, 'o-hero-freepage__title f-title-3')]") %>%
    html_text()
  # Get meta data
  meta <- dat %>%
    xml_find_all("//span[contains(@class, 'o-hero-freepage__meta')]") %>%
    html_text()
  
  # get country, year, whether in force, and level
  country <- dat %>%
    xml_find_all("//span[contains(@class, 'o-policy-aside-item__value')]") %>%
    html_text()
  
  # Get the description, as well as the topic and sector
  desc <- dat %>%
    xml_find_all("//div[contains(@class, 'm-block__content f-rte f-rte--block')]") %>%
    html_text() %>%
    # Split into a new cell whenever you see Topics or Sector
    str_split(pattern = "Topics\n|Sectors\n", simplify = TRUE) %>%
    as.vector() %>%
    # Remove frill
    str_remove_all("\n") %>% 
    str_remove_all("Remove Filter") %>%
    str_trim()
  
  # Combine name, meta, country, desc
  data.frame(policy = name,
             source = meta[1],
             updated = meta[2],
             country = country[1],
             year = country[2],
             status = country[3],
             jurisdiction = country[4],
             description = desc[1],
             topics = desc[2],
             sectors = desc[3],
             link = data$link) %>%
    return()

}


tracker = function(data){
  # Finally, let's calculate a completion bar
  print(data$id)
  return(data)
}


out %>%
  split(.$id) %>%
  map_dfr(~tracker(.) %>% get_policies(), .id = "id") %>%
  saveRDS("raw_data/iea_policy_dataset.rds")

remove(out)
```

```{r}

ids <- expand_grid(country = read_rds("raw_data/iea_policy_dataset.rds")$country %>% unique()) %>%
  left_join(by = "country",
            y = wb_countries() %>% select(country, code = iso3c)) %>%
  mutate(code = case_when(
    country == "Korea" ~ "KOR",
    country == "People's Republic of China" ~ "CHN",
    country == "United Republic of Tanzania" ~ "TZA",
    country == "Viet Nam" ~ "VNM",
    country == "Lao People's Democratic Republic" ~ "LAO",
    country == "Islamic Republic Of Iran" ~ "IRN",
    country == "Bolivarian Republic Of Venezuela" ~ "BOL",
    country == "Antigua And Barbuda" ~ "ATG",
    country == "United Republic Of Tanzania" ~ "TZA",
    country == "Saint Vincent And The Grenadines" ~ "VCT",
    country == "Yemen" ~ "YEM",
    country == "Taiwan" ~ "TWN",
      TRUE ~ code)) %>%
  # Remove Scotland, because it is not an autonomous nation-state
  filter(code != "Scotland")

# Get list of policies
policies <- read_rds("raw_data/iea_policy_dataset.rds") %>%
  # Data cleaning
  mutate(policy = policy %>% str_remove_all("\t|\r|\n")) %>%
  # To approximate the length of each, policy, we're going to use the date-last-updated
  mutate(end = if_else(status == "Ended", 
                       true = updated %>% 
                         str_extract(pattern = "[0-9]{4}") %>% 
                         as.numeric(),
                       false = NA_real_),
         year = as.numeric(year)) 
#
policies  %>% head()
```


```{r}

policies <- policies %>%
  # Join country codes to policies
  left_join(by = "country", y = ids) %>%
  # Exclude policies attributed to subnational jurisdictions
  filter(!is.na(code)) %>% 
  # Now just grab the country, the year the policy started, the year it presumably ended,
  # the topics and sectors, and 
  select(policy_id = id, code, year, end, topics, sectors) %>%
  # Code dataset for key outcomes of interest
  # Let's use Hood and Margett's (2007) typology for policy toolkits
  # Nodality
  # - Information and education
  # Authority
  # - Codes and standards
  # - Regulatory instruments
  # - Minimum energy performance standard
  # Treasure
  # - Feed-in tariffs/premiums,
  # - Economic instruments
# - Fiscal/financial incentives
# - Grants/subsidy
# - Tax relief
# Organization
# - Research, development, and deployment
# - Strategic Planning
mutate(
  # Code FIT
  policy_fit = if_else(str_detect(topics, pattern = "Feed-in tariffs"), 1, 0),
  # Code basic NATO typology
  policy_nodality = if_else(str_detect(topics, pattern = "Information and education"), 1,0),
  policy_authority = if_else(
    condition = str_detect(
      topics, 
      pattern = paste(c("Codes and standards", 
                        "Regulatory instruments",
                        "Minimum energy performance standard"),
                      collapse = "|")),
    true = 1, false = 0),
  policy_treasure = if_else(
    condition = str_detect(
      topics, pattern = paste(
        c("Feed-in tariffs/premiums",
          "Market-based instruments",
          "Economic instruments",
          "Fiscal/financial incentives",
          "Grants/subsidy",
          "Tax relief"), collapse = "|")),
    true = 1, false = 0),
  policy_organization = if_else(
    condition = str_detect(
      topics, pattern = paste(c("Research, development, and deployment",
                                "Strategic Planning"),
                              collapse = "|")),
    true = 1, false = 0))

# Having coded our policies, we need to bend these out into an edgelist 
# of exactly which country-years there were

get_policy_list = function(data){
  expand_grid(code = data$code,
              year = seq(from = data$year, 
                         to = if_else(!is.na(data$end), data$end, 2020))) %>%
    return()
}

# Generate the list of countries
policies %>%
  split(.$policy_id) %>%
  map_dfr(~get_policy_list(.), .id = "policy_id") %>%
  # Now left_join in the traits we coded
  left_join(by = c("policy_id", "code"),
            y = policies %>%
              select(policy_id, code, 
                     policy_fit, policy_nodality, policy_authority, 
                     policy_treasure, policy_organization)) %>%
  # And also specify that each of these is a policy, just in case!
  mutate(policy = 1) %>%
  saveRDS("raw_data/iea_policy_list.rds")


remove(policies, get_policies)

# Finally, tally up *how many* of each type of policy were identified
# for each country-year
read_rds("raw_data/iea_policy_list.rds") %>%
  group_by(code, year) %>%
  summarize(
    policy_any = sum(policy, na.rm = TRUE),
    policy_fit = sum(policy_fit, na.rm = TRUE),
    policy_nodality = sum(policy_nodality, na.rm = TRUE),
    policy_authority = sum(policy_authority, na.rm = TRUE),
    policy_treasure = sum(policy_authority, na.rm = TRUE),
    policy_organization = sum(policy_organization, na.rm = TRUE)) %>%
  saveRDS("raw_data/iea_policy_tally.rds")
```

```{r}
wb_countries() %>% 
  select(country, code = iso3c) %>%
  filter(str_detect(country, pattern = "Yemen"))
#"Scotland" = "GBR" # We're only looking at national policies,
# and Scotland isn't currently a fully-fledged state
```


## 1.9 Other Measures

Healthy Life Years at Birth

```{r}
# Search for more variables here
# Next, I also collected Healthy Life Years at Birth
#https://apps.who.int/gho/data/node.main.688

dat <- read_csv("raw_data/who_healthy_life_years.csv") %>% 
  select(country = 1, year = 2,
    life_expectancy = `Life expectancy at birth (years)`,
    hale = `Healthy life expectancy (HALE) at birth (years)`) %>%
  slice(-1) %>%
  mutate_at(vars(life_expectancy, hale), funs(as.numeric(.))) 

# They are extremely strongly correlated (r = 0.99)!
dat %>%
  summarize(cor = cor(life_expectancy, hale,
                      use = "pairwise.complete.obs"))
# This means we can just use life expectancy at birth as a predictor,
# which is available for every year, I believe, while health life expectancy is not.

remove(dat)
```

## 1.10 Human Development Index

```{r}
# Download Human Development Index 
#(which for some reason is not currently easily accessible by wb_stat.)
#http://hdr.undp.org/en/indicators/137506#


dat <- wb_countries() %>%
  select(code = iso3c, country)


read_csv("raw_data/human_development_index.csv", skip = 5) %>% 
  select(country = Country, matches("[0-9]{4}")) %>%
  pivot_longer(cols = matches("[0-9]{4}"), 
               names_to = "year", values_to = "hdi") %>%
  mutate_at(vars(year, hdi), funs(as.numeric(.))) %>%
  filter(!is.na(hdi)) %>%
  # Let's bind in Taiwan's scores too
  bind_rows(
    # No HDI for TAIWAN!
    # Download approximation from new Nature Paper
    #Smits, J & I. Permanyer (2019). The Subnational Human Development Database. (Nature) Scientific Data, 6, 190038. Online version available here..
    
    # https://globaldatalab.org/shdi/
    read_csv("raw_data/GDL-Sub-national-HDI-data.csv") %>%
      select(Country, ISO_Code, Level, Region, matches("[0-9]{4}")) %>%
      filter(str_detect(Region, "Taiwan")) %>%
      pivot_longer(cols = matches("[0-9]{4}"),
                   names_to = "year", values_to = "hdi") %>%
      select(country = Region, year, hdi) %>%
      mutate_at(vars(year, hdi), funs(as.numeric(.)))) %>%
  # Join in codes
  left_join(by = "country", y = dat) %>%
  # Fix codes that did not transfer
  mutate(code = case_when(
     country == "Korea (Republic of)" ~ "KOR",
    country == "People's Republic of China" ~ "CHN",
    country == "United Republic of Tanzania" ~ "TZA",
    country == "Viet Nam" ~ "VNM",
    country == "Lao People's Democratic Republic" ~ "LAO",
    country == "Iran (Islamic Republic of)" ~ "IRN",
    country == "Venezuela (Bolivarian Republic of)" ~ "VEN",
    country == "Antigua And Barbuda" ~ "ATG",
    country == "Tanzania (United Republic of)" ~ "TZA",
    country == "Saint Vincent and the Grenadines" ~ "VCT",
    country == "Yemen" ~ "YEM",
    country == "Taiwan" ~ "TWN",
    country == "Bahamas" ~ "BHS",
    country == "Bolivia (Plurinational State of)" ~ "BOL",
    country == "Congo (Democratic Republic of the)" ~ "COD",
    country == "Congo" ~ "COG",
    country == "Czechia" ~ "CZE",
    str_detect(country, "Ivoire") ~ "CIV",
    country == "Egypt" ~ "EGY",
    country == "Eswatini (Kingdom of)" ~ "SWZ",
    country == "Gambia" ~ "GMB",
    country == "Hong Kong, China (SAR)" ~ "HKG",
    country == "Kyrgyzstan" ~ "KGZ",
    country == "Micronesia (Federated States of)" ~"FSM",
    country == "Saint Kitts and Nevis" ~ "KNA",
    country == "Saint Lucia" ~ "LCA",
    country == "Slovakia" ~ "SVK",
    country == "Moldova (Republic of)" ~ "MDA",
    TRUE ~ code)) %>%
  distinct() %>%
  saveRDS("raw_data/hdi.rds")


read_rds("raw_data/hdi.rds") %>%
  distinct()
  filter(is.na(code)) %>%
  select(country) %>%
  distinct()

#dat %>%
#  filter(str_detect(country, "Vene"))
remove(dat)
```



## 1.11 Renewable Energy Data

```{r, message = FALSE, warning = FALSE}
read_csv("raw_data/renworldbes.csv", skip = 1) %>%
  select(country = COUNTRY,
         year = TIME,
         hydro_iea = Hydro,
         wind_iea = Wind,
         tidal_iea = `Tide, wave and ocean`,
         solar_iea = `Solar photovoltaics`,
         solar_thermal_iea = `Solar thermal (direct use in TJ-net)`,
         geothermal_iea = `Geothermal (direct use in TJ-net)`)  %>%
  mutate_at(vars(hydro_iea:geothermal_iea), funs(as.numeric(.))) %>%
  # pivot longer
  #pivot_longer(
  #  cols = c(hydro, wind, tidal, solar, solar_thermal, geothermal),
  #  names_to = "type",
  #  values_to = "gwh") %>%
  # Join in ISO3 codes
  left_join(by = c("country"),
            y = wb_countries() %>%
              select(code = iso3c, country)) %>%
  mutate(code = case_when(
    country == "People's Republic of China" ~ "CHN",
    country == "United Republic of Tanzania" ~ "TZA",
    country == "Viet Nam" ~ "VNM",
    country == "Lao People's Democratic Republic" ~ "LAO",
    country == "Islamic Republic Of Iran" ~ "IRN",
    country == "Bolivarian Republic Of Venezuela" ~ "BOL",
    country == "Antigua And Barbuda" ~ "ATG",
    country == "United Republic Of Tanzania" ~ "TZA",
    country == "Saint Vincent And The Grenadines" ~ "VCT",
    country == "Yemen" ~ "YEM",
    country == "Taiwan" ~ "TWN",
    country == "Democratic Republic of the Congo" ~ 	"COD",
    str_detect(country, "Ivoire") ~ "CIV",
    country == "Egypt" ~ "EGY",
    str_detect(country, "Greece") ~ "GRC",
    country == "Islamic Republic of Iran" ~ "IRN",
    country == "Korea" ~ "KOR",
    country == "Republic of North Macedonia" ~ "MKD",
    str_detect(country, "Norway") ~ "NOR",
    country == "Russia" ~ "RUS",
    country == "Republic of Moldova" ~ "MDA",
    TRUE ~ code)) %>%
  # save to file
  saveRDS("raw_data/renewables.rds")

```

## 1.12 Combine

```{r, message= FALSE, warning = FALSE}
# Using the World Bank's list,
# let's grab all the countries in the world
wb <- wb_countries() %>%
  select(code = iso3c, country, income_level, region)

read_rds("raw_data/equity.rds") %>%
  # Join in the ISO-3C country codes, regions, and income levels
  # by country name
  left_join(by = c("country"), y = wb) %>%
  # Join in all world bank data
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/worldbank_output.rds") %>%
              select(-iso2c, -country) %>%
              rename(code = iso3c, year = date)) %>%
  # Import rural and urban pop
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/demographics.rds")) %>%
  # Join in renewable energy data
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/renewables.rds") %>%
              select(-country)) %>%
  # Join in Human Development Index data
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/hdi.rds") %>%
              select(-country)) %>%
  # Join in median age
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/median_age_bracket.rds") %>%
              ungroup() %>%
              select(-country)) %>% 
  # Join in World Governance Indicators
  left_join(by = c("code", "year"), 
            y = read_rds("raw_data/wgi.rds") %>% 
              filter(measure == "Estimate") %>%
              select(-country, -measure) %>%
              pivot_wider(
                id_cols = c(code, year),
                names_from = type,
                values_from = value)) %>%
  # Join in Polity 5 score
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/polity.rds")) %>%
  # Join in voter turnout
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/voter_turnout.rds") %>%
              select(-country)) %>%
  # Join in index of Major Episodes of Political Violence
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/violence.rds") %>% 
              select(code, year, violence_mepg = violence)) %>%
  # Join in tally of how many RE policies were in effect each year
  left_join(by = c("code", "year"),
            y = read_rds("raw_data/iea_policy_tally.rds")) %>%
  # Fill in any missing data points for policy variables with zero,
  # Because if they were not listed on IEA policy dataset,
  # we are assuming they don't exist or
  # aren't significant enough to be mentioned.
  mutate_at(vars(policy_any, policy_fit, policy_nodality,
                 policy_authority,policy_treasure, policy_organization),
            funs(if_else(is.na(.), 0, .))) %>%
  # Save result as a raw-dataset, as in we haven't adjusted any missing data yet
  saveRDS("raw_data/raw_dataset.rds")
```


## 1.10 Missing Data


```{r}
dat <- read_rds("raw_data/raw_dataset.rds") %>%
  pivot_longer(
    cols = -c(code, country, year, income_level, region),
    names_to = "measure",
    values_to = "value")



# Let's identify the variable-years where our countries 
# are missing less than 5% of missing data
# These are the variable-years that we will use in our analysis.
myvaryears <- dat %>% 
  group_by(year, measure) %>%
  summarize(missing = sum(is.na(value)) / n()) %>%
  ungroup() %>%
  filter(missing < 0.05) %>%
  mutate(variable = paste(measure, year, sep = "-")) %>%
  ungroup() %>%
  select(variable) %>%
  unlist() %>% 
  unname()

# Now let's identify the unique variables, 
# where at least 95% countries had full data 
# for at least a single year in our period
myvars <- myvaryears %>% 
  str_remove("[-][0-9]{4}") %>%
  unique()  


# Zoom into just the variables where at least one year has 95% of data
dat %>%
  # Filter to just vars where we ever have at least 95% of data
  filter(measure %in% myvars) %>%
  
  # Now if the value is for one of our "good" variable-years (95% data+),
  # Let's fill in missing data with the mean;
  # otherwise leave as is
  group_by(year, measure) %>%
  mutate(variable = paste(measure, year, sep = "-")) %>%
  mutate(value = if_else(
    # If that variable-year is missing less than 5% of cases,
    condition = sum(is.na(value)) / n() < 0.05 & 
      # and the cell is missing, then...
                           is.na(value),
    # Impute the mean
                         true = mean(value, na.rm = TRUE), 
    # Or don't
    false = value)) %>%
  ungroup() %>%
  # Now fill in missing data in "bad" years with
  # original and mean data from good years
  # Arrange by year
  group_by(code, measure) %>%
  arrange(measure, code, year) %>%
  # Now fill in missing data (eg. 2008) 
  # with the most recent data available (eg. 2005),
  # and then go back to any values PRIOR (eg. 2000) 
  # to the most recent data available ,
  # and fill those in with that point (eg. 2005).
  fill(value, .direction = "downup") %>%
  ungroup() %>%
  distinct() %>%
  # Now pivot back out
  pivot_wider(
    id_cols = c(code, country, year, income_level, region),
    names_from = measure,
    values_from = value) %>%
  saveRDS("data/dataset_filled.rds")



remove(dat)

```


## 1.11 Output

```{r}
read_rds("data/dataset_filled.rds") %>%
  write_csv("data/dataset_filled.csv")

read_rds("raw_data/raw_dataset.rds") %>%
  write_csv("raw_data/dataset_raw.csv")

```


# 2. Descriptives

First, let's generate some descriptive statistics about our indices.

## 2.1 Equity by Income

First, we will visualize our social equity index scores compared by World Bank income group classifications. We can use violin plots to this, which very nicely show the distribution of social equity across each of the four income groups.

```{r}
# IBM Colorblind friendly scale
# https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

read_rds("data/dataset_filled.rds") %>%
  mutate(income_level = income_level %>% recode_factor(
    "High income" = "High",
    "Upper middle income" = "Upper-Middle",
    "Lower middle income" = "Lower-Middle",
    "Low income" = "Low")) %>%
  ggplot(mapping = aes(x = income_level, y = equity, 
                       fill = income_level, color = income_level)) +
  geom_jitter(alpha = 0.25) +
  geom_violin(alpha = 0.75, color = "black") +
  geom_point(data = . %>% 
               group_by(income_level) %>%
               summarize(equity = median(equity, na.rm = TRUE)),
             mapping = aes(x = income_level, y = equity,
                           color = income_level),
             size = 3, color = "black") +
    geom_point(data = . %>% 
               group_by(income_level) %>%
               summarize(equity = median(equity, na.rm = TRUE)),
             mapping = aes(x = income_level, y = equity,
                           color = income_level),
             size = 2, color = "white") +
  scale_fill_manual(values = mycolors[c(1,2,3,5)]) +
  scale_color_manual(values = mycolors[c(1,2,3,5)]) +
  labs(x = "World Bank Income Classification", 
       y = "Social Equity Index Score",
       caption = "Violins depict distributions, with white circles as median values.") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 0.5)) +
  guides(fill = FALSE, color = FALSE) +
  ggsave("viz/equity_violins.png", width = 6, height = 4, dpi = 500)
```

## 2.2 Bivariate Correlations

Second, let's generate some bivariate scatterplots, all appended together in one chart, depicting the relationship between each raw covariate in our study and the social equity index score.

```{r}
library(tidyverse)

dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  select(country, year, equity, renewables, biomass, hydro, nuclear,
         solar_iea, wind_iea, tidal_iea, geothermal_iea, #hydro_iea,
         policy_any, policy_fit, policy_nonfit,
         government_effectiveness, polity2, gdp_capita,
         unemployment, hdi,# emissions_ghg, 
         oil_rents,fossil, pop, ratio_rural_urban, age) %>%
  pivot_longer(cols = -c(country, year, equity), names_to = "covariate", values_to = "value")  %>%
  # Classify into groups
  mutate(class = case_when(
    str_detect(covariate, "renewables|_iea|biomass|hydro|nuclear") ~ "Path Dependence",
    str_detect(covariate, "oil|fossil") ~ "Resource/Development Curses",
    str_detect(covariate, "policy") ~ "Policy Toolkits",
    str_detect(covariate, "govern|polity") ~ "Governance",
    TRUE ~ "Development")) %>%
  # For each Covariate, rescale into Z-scores
  group_by(covariate) %>%
  mutate(value = scale(value)) %>%
  ungroup() %>%
  mutate(covariate = covariate %>% recode_factor(
    "renewables" = "Renewable\nEnergy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal",
    "biomass" = "Biomass",
    "hydro" = "Hydro",
    "nuclear" = "Nuclear",
        # Resource Curse
    "oil_rents" = "Oil Rents\n(% of GDP)",
    "fossil" = "Fossil\nFuels",
    # Policies
    "policy_any"= "Policies:\nAny",
    "policy_fit" = "Policies:\nFeed-in Tariff",
    "policy_nonfit" = "Policies:\nNon-Feed-in Tariff",
    # Governance
    "government_effectiveness" = "Government\nEffectiveness",
    "polity2" = "Strength\nof Democracy",
    # Development
    "gdp_capita" = "GDP\nper capita",
    "unemployment" = "Unemployment\nRate",
    "hdi" = "Human\nDevelopment\nIndex",
    "age" = "Median Age\nBracket\n(5 years)",
    "ratio_rural_urban" = "Ratio of Rural\nto Urban\nResidents",
    "pop" = "Population")) 

mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

dat %>%
  ggplot(mapping = aes(x = value, y = equity, color = class)) +
  geom_jitter(alpha = 0.5) +
  geom_smooth(method = "lm", color = "black") +
  facet_wrap(~covariate, scales = "free_x", ncol = 7) +
  theme_classic(base_size = 15) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm"),
        legend.position = "bottom") +
  scale_color_manual(values = mycolors) +
  labs(x = "Covariate Value (Z-Score)",
       y = "Social Equity Index Score", color = "Type") +
  ggsave("viz/figure_5.png", width = 12, height = 8, dpi = 500)
```



# 3. Models

Next, we're going to model the effect of our covariates on social equity index scores, assessed over time. Since this is a panel dataset, we need fixed and random effects models to account for the effect of time. As shown below in Hausman tests, fixed effects consistently work better than random effects; statistically significant Hausman test statistics indicate this. Additionally, random effects work rather poorly, frequently causing singularity problems because of so little variation. There's also a theoretical reason why we would want fixed effects more; we really want to directly account for changing dynamics for each year, and each year might really have a specific effect (like the post-Kyoto Protocol years, for example). In these kinds of cases, fixed effects work better both based on Hausman tests and theoretical reasons.

## 3.1 Basic Models

```{r}
library(tidyverse)
library(plm)
library(ggeffects)

# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
    # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents,fossil, pop, ratio_rural_urban, age),
            funs(scale(.) %>% as.numeric()))

#dat %>%
#  select(income_level, country) %>%
#  distinct() %>%
#  group_by(income_level) %>%
#  summarize(count = n())


# Let's run our simplest models as both fixed and random effects,
# and check the hausman test statistic
m1c <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "within", effect = "time", index = c("country", "year"))
m1d <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "random", effect = "time", index = c("country", "year"))
# Looks like the Hausman test is statistically significant,
# meaning that fixed effects are a better fit here.
# I think conceptually, they area better fit here too,
# since we're trying to directly account for variation over time
phtest(m1c, m1d)


m2c <- dat %>%
  plm::plm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
     model = "within", effect = "time", index = c("country", "year"))
m2d <- dat %>%
  plm::plm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
     model = "random", effect = "time", index = c("country", "year"))

phtest(m2c,m2d)
# Looks like the Hausman test is statistically significant,
# meaning that fixed effects are a better fit here.
# I think conceptually, they area better fit here too,
# since we're trying to directly account for variation over time


# Let's go ahead and report just the basic fixed effects models.
# For random effects, reviewers can refer to the replication code;
# they're basically identical anyways.

remove(m1c,m1d, m2c,m2d)

# Fixed effects.
#m1c %>% summary()
m1a <- dat %>%
  lm(formula = equity ~ renewables + hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))

m2a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))

m3a <- dat %>%
  lm(formula = equity ~ renewables +
             hydro + nuclear + biomass +
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))

m4a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
# Pretty solid VIF!
m4a %>% car::vif()

# Let's make a table
texreg::htmlreg(
  list(m1a,m2a,m3a,m4a),
  file = "table/table_1A.html",
  caption.above = TRUE,
  caption = "<b>OLS Panel Models of Social Equity Scores</b><br><i>for each of 99 Countries over 26 Years, with Annual Fixed Effects",
  custom.header = list("Any RE Promotion Policy" = 1:2, 
                       "RE Promotion Policies by Type" = 3:4),
  custom.model.names = c("RE Overall",
                         "RE by Type",
                         "RE Overall",
                         "RE by Type"),
  custom.note = "*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 10. Each model includes fixed effects by year. Statistically significant Hausman tests found that fixed effects fit better than random effects. All coefficients represent the change in equity scores given a 1 standard deviation increase in the predictor.",
  stars = c(0.001, 0.01, 0.05, 0.10),
  bold = 0.10,
  single.row = TRUE,
  custom.coef.map = list(
    # Technologies
    "renewables" = "Renewable Energy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal",
    "biomass" = "Biomass",
    "hydro" = "Hydro (cubed)",
    "nuclear" = "Nuclear",
        # Resource Curse
        "oil_rents" = "Oil Rents (% of GDP)",
    "fossil" = "Fossil Fuels",
    # Policies
    "policy_any"= "Policies: Any",
    "policy_fit" = "Policies: Feed-in Tariff",
    "policy_nonfit" = "Policies: Non-Feed-in Tariff",
    # Governance
    "government_effectiveness" = "Government Effectiveness (log)",
    "polity2" = "Strength of Democracy",
    # Development
    "gdp_capita" = "GDP per capita (squared)",
    "unemployment" = "Unemployment Rate",
    "hdi" = "Human Development Index (reciprocal)",
    "age" = "Median Age Bracket (5 years)",
    "ratio_rural_urban" = "Ratio of Rural to Urban Residents",
    "pop" = "Population (log)",
    "income_levelUpper middle income" = "Income Level: Upper-Middle",
    "income_levelLower middle income" = "Income Level: Lower-Middle",
    "income_levelLow income" = "Income Level: Low",
    "regionSouth Asia" = "South Asia",
    "regionSub-Saharan Africa" = "Sub-Saharan Africa",
    "regionMiddle East & North Africa" = "Middle East & North Africa",              
    "regionEurope & Central Asia" = "Europe & Central Asia",
    "regionLatin America & Caribbean" = "Latin America & Caribbean", 
    "regionNorth America" = "North America", 
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Path Dependence</b>" = 1:8,
    "<b>Curse of Development</b>" = 9:10,
    "<b>Policy Toolkits</b>" = 11:13,
    "<b>Quality of Governance</b>" = 14:15,
    "<b>Development</b>" = 16:21),
#    "<b>Income Groups</b>" = 22:24),
#    "<b>Geography</b>" = 25:30),
  include.fstat = TRUE,
  custom.gof.rows = list(
    "Mean VIF" =   list(m1a,m2a,m3a,m4a) %>%
  map(~car::vif(.)[,3]^2 %>% mean()) %>%
  unlist())
)
```

## 3.2 Correlations

Let's try our a few correlations too.

```{r}
# That's a pretty compelling endorsement for fixed effects models.

# Life expectancy and HDI are correlated at 0.88; let's use HDI instead
read_rds("data/dataset_filled.rds") %>%  
  summarize(cor = cor(hdi, life_exp, use = "pairwise.complete.obs"))

# Looks like hydro-idea is micro-hydro,
# while hydro is large-scale hydro; they are not correlated at all
read_rds("data/dataset_filled.rds") %>%  
  mutate(hydro_iea = hydro_iea / pop) %>%
  summarize(cor = cor(hydro, hydro_iea, use = "pairwise.complete.obs"))

dat %>%
  summarize(pearson = cor(polity2, policy_fit))

dat %>%
  summarize(pearson = cor(government_effectiveness, policy_fit))

read_rds("data/dataset_filled.rds") %>%  
  #filter(income_level == "High income") %>%
  summarize(pearson = cor(equity, hdi))
read_rds("data/dataset_filled.rds") %>%  
  filter(income_level == "High income") %>%
  summarize(pearson = cor(equity, hdi))
# For example, social equity was strongly correlated with the human development index (r = 0.85), especially among high income countries (r = 0.58)
```

#### Theory Building:

I see several major stories here:

- First, countries adopting more renewables see better equity scores (somewhat with wind), but that effect wants after controlling for regions.

- Second, countries adopting feed-in tariffs see WORSE equity, while those adopting other policies (auctions, RPS, etc.) see better equity.

- Third, capacity improves equity, but it is negatively associated with strength of democracy.

- Fourth, countries more deeply invested in oil, after accounting for fossil fuel burning, see better equity. Countries that burn more fossil fuels see slightly worse equity scores.

- Fifth, high income countries always have higher social equity than lower income countries


## 3.3 Visualization

Finally, let's visualize the overall effects found in these models.

#### Renewable Energy

```{r}
# IBM Colorblind friendly scale
# https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

out <- bind_rows(
  m3a %>%
    ggpredict(terms = c("renewables [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Renewables"),
  
  m4a %>%
    ggpredict(terms = c("solar_iea [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Solar PV"),
  
  m4a %>%
    ggpredict(terms = c("wind_iea [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Wind"),
  
  m4a %>%
    ggpredict(terms = c("geothermal_iea [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Geothermal"),
  
  m4a %>%
    ggpredict(terms = c("biomass [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Biomass")) %>%
  mutate(type = factor(type, levels = c("Renewables", "Solar PV", 
                                        "Wind", "Geothermal", "Biomass")))



out %>%
  ggplot(mapping = aes(x = x, y= predicted,
                            ymin = conf.low, ymax = conf.high,
                            fill = type, color =type, group = type)) +
    geom_ribbon(alpha = 0.80) +
    geom_line(alpha = 1) +
#  geom_jitter(alpha = 0.10) +
  facet_wrap(~type, ncol = 5) +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.border = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 0.5)) +
  guides(fill = FALSE, color = FALSE) +
  # Bind 2 standard deviations from the mean
  #xlim(c(-2, 2)) +
  labs(y = "Marginal Effects\non Social Equity Scores", x = "Renewable Energy Technology Adoption (Z-scores)",
       caption = "Lines depict modeled marginal effects with 95% confidence intervals as bands,\n with control variables, nholding all other predictors at their means.") +
  scale_fill_manual(values = rev(mycolors)) +
  scale_color_manual(values = rev(mycolors)) +
  ggsave("viz/figure_1.png", width = 8, height = 4, dpi = 500)
```

#### Feed-in Tariffs

```{r}

out <- bind_rows(
  m4a %>%
    ggpredict(terms = c("policy_fit [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Feed-in Tariff\nPolicies",
           year = "Overall"),
    
  m4a %>%
    ggpredict(terms = c("policy_fit [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe",
              condition = c(Year = 1990)) %>%
    as.data.frame() %>%
    mutate(type = "Feed-in Tariff\nPolicies",
           year = "1990"),
    
  m4a %>%
    ggpredict(terms = c("policy_fit [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe",
              condition = c(Year = 2015)) %>%
    as.data.frame() %>%
    mutate(type = "Feed-in Tariff\nPolicies",
           year = "2015"),
  m4a %>%
    ggpredict(terms = c("policy_nonfit [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe") %>%
    as.data.frame() %>%
    mutate(type = "Other RE\nPolicies",
           year = "Overall"),
  
  m4a %>%
    ggpredict(terms = c("policy_nonfit [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe",
              condition = c(Year = 1990)) %>%
    as.data.frame() %>%
    mutate(type = "Other RE\nPolicies",
           year = "1990"),

  m4a %>%
    ggpredict(terms = c("policy_nonfit [-3:3]"), 
              ci.lvl = 0.95, typical = "mean", type = "fe",
              condition = c(Year = 2015)) %>%
    as.data.frame() %>%
    mutate(type = "Other RE\nPolicies",
           year = "2015"))

out %>%
  filter(year == "Overall") %>%
  ggplot(mapping = aes(x = x, y = predicted, ymin = conf.low, ymax = conf.high, 
                       fill = type, color = type, group = type)) +
  geom_line(show.legend = TRUE) +
  geom_ribbon(alpha = 0.5) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.position = "right") +
  scale_color_manual(values = mycolors[c(1,5)]) +
    scale_fill_manual(values = mycolors[c(1,5)]) +
  labs(x = "# of Renewable Energy Policies\nAdopted (Z-scores)",
       y = "Marginal Effects on\nSocial Equity Scores",
       fill = "Policy Tool") +
  guides(color = FALSE) +
  ggsave("viz/figure_2.png", width = 6, height = 4, dpi = 500)
```

## 4. Models By Income Group

Next, let's visualize each income group separately to show changing dynamics based on countries' gross national income per capita (the metric that the World Bank uses to assign income groups).

#### High

```{r}
# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
    # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents, fossil, pop, ratio_rural_urban, age),
            funs(scale(.) %>% as.numeric())) %>%
  filter(income_level == "High income") %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) 


# Let's run our simplest models as both fixed and random effects,
# and check the hausman test statistic
m1c <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "within", effect = "time", index = c("country", "year"))
m1d <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "random", effect = "time", index = c("country", "year"))
# Looks like the Hausman test is statistically significant,
# meaning that fixed effects are a better fit here.
# I think conceptually, they area better fit here too,
# since we're trying to directly account for variation over time
phtest(m1c, m1d)

remove(m1c,m1d)


m1a <- dat %>%
  lm(formula = equity ~ renewables + hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
#(m1a %>% car::vif())^2

m2a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))

m3a <- dat %>%
  lm(formula = equity ~ renewables +
             hydro + nuclear + biomass +
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))

m4a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
m4b <- dat %>%
  lme4::lmer(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban  + age + (1 |year))
# Pretty solid VIF!
(m4a %>% car::vif())^2


# Let's make a table
texreg::htmlreg(
   list(m1a,m2a,m3a,m4a),
  file = "table/table_2A.html",
  caption.above = TRUE,
  caption = "<b>OLS Panel Models of Social Equity Scores for High Income Countries</b><br><i>for each of 40 Countries over 26 Years, with Annual Fixed Effects",
  custom.header = list("Any RE Promotion Policy" = 1:2, 
                       "RE Promotion Policies by Type" = 3:4),
  custom.model.names = c("RE Overall",
                         "RE by Type",
                         "RE Overall",
                         "RE by Type"),
  custom.note = "*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 10. Each model includes fixed effects by year. Statistically significant Hausman tests found that fixed effects fit better than random effects. All coefficients represent the change in equity scores given a 1 standard deviation increase in the predictor.",
  stars = c(0.001, 0.01, 0.05, 0.10),
  bold = 0.10,
  single.row = TRUE,
  custom.coef.map = list(
    # Technologies
    "renewables" = "Renewable Energy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal",
    "biomass" = "Biomass",
    "hydro" = "Hydro (cubed)",
    "nuclear" = "Nuclear",
        # Resource Curse
        "oil_rents" = "Oil Rents (% of GDP)",
    "fossil" = "Fossil Fuels",
    # Policies
    "policy_any"= "Policies: Any",
    "policy_fit" = "Policies: Feed-in Tariff",
    "policy_nonfit" = "Policies: Non-Feed-in Tariff",
    # Governance
    "government_effectiveness" = "Government Effectiveness (log)",
    "polity2" = "Strength of Democracy",
    # Development
    "gdp_capita" = "GDP per capita (squared)",
    "unemployment" = "Unemployment Rate",
    "hdi" = "Human Development Index (reciprocal)",
    "age" = "Median Age Bracket (5 years)",
    "ratio_rural_urban" = "Ratio of Rural to Urban Residents",
    "pop" = "Population (log)",
    "income_levelUpper middle income" = "Income Level: Upper-Middle",
    "income_levelLower middle income" = "Income Level: Lower-Middle",
    "income_levelLow income" = "Income Level: Low",
    "regionSouth Asia" = "South Asia",
    "regionSub-Saharan Africa" = "Sub-Saharan Africa",
    "regionMiddle East & North Africa" = "Middle East & North Africa",              
    "regionEurope & Central Asia" = "Europe & Central Asia",
    "regionLatin America & Caribbean" = "Latin America & Caribbean", 
    "regionNorth America" = "North America", 
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Path Dependence</b>" = 1:8,
    "<b>Curse of Development</b>" = 9:10,
    "<b>Policy Toolkits</b>" = 11:13,
    "<b>Quality of Governance</b>" = 14:15,
    "<b>Development</b>" = 16:21),
#    "<b>Income Groups</b>" = 22:24),
#    "<b>Geography</b>" = 25:30),
  include.fstat = TRUE,
  custom.gof.rows = list(
    "Mean VIF" =   list(m1a,m2a,m3a,m4a) %>%
  map(~car::vif(.)[,3]^2 %>% mean()) %>%
  unlist())
)
#rm(list = ls())
```

#### Upper-Middle

```{r}
# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
  # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents, fossil, pop, ratio_rural_urban, age),
            funs(scale(.) %>% as.numeric())) %>%
  filter(income_level == "Upper middle income") %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) 


# Let's run our simplest models as both fixed and random effects,
# and check the hausman test statistic
m1c <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "within", effect = "time", index = c("country", "year"))
m1d <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "random", effect = "time", index = c("country", "year"))
# Looks like the Hausman test is statistically significant,
# meaning that fixed effects are a better fit here.
# I think conceptually, they area better fit here too,
# since we're trying to directly account for variation over time
phtest(m1c, m1d)

remove(m1c,m1d)

m1a <- dat %>%
  lm(formula = equity ~ renewables + hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
#(m1a %>% car::vif())^2
m2a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
m3a <- dat %>%
  lm(formula = equity ~ renewables +
             hydro + nuclear + biomass +
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
m4a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro + nuclear + biomass + 
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
# Pretty solid VIF!
(m4a %>% car::vif())^2

# Let's make a table
texreg::htmlreg(
  list(m1a,m2a,m3a,m4a),
  file = "table/table_3A.html",
  caption.above = TRUE,
   caption = "<b>OLS Panel Models of Social Equity Scores for Upper-Medium Income Countries</b><br><i>for each of 29 Countries over 26 Years, with Annual Fixed Effects",
  custom.header = list("Any RE Promotion Policy" = 1:2, 
                       "RE Promotion Policies by Type" = 3:4),
  custom.model.names = c("RE Overall",
                         "RE by Type",
                         "RE Overall",
                         "RE by Type"),
  custom.note = "*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 10. Each model includes fixed effects by year. Statistically significant Hausman tests found that fixed effects fit better than random effects. All coefficients represent the change in equity scores given a 1 standard deviation increase in the predictor.",
  stars = c(0.001, 0.01, 0.05, 0.10),
  bold = 0.10,
  single.row = TRUE,
  custom.coef.map = list(
    # Technologies
    "renewables" = "Renewable Energy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal",
    "biomass" = "Biomass",
    "hydro" = "Hydro (cubed)",
    "nuclear" = "Nuclear",
        # Resource Curse
        "oil_rents" = "Oil Rents (% of GDP)",
    "fossil" = "Fossil Fuels",
    # Policies
    "policy_any"= "Policies: Any",
    "policy_fit" = "Policies: Feed-in Tariff",
    "policy_nonfit" = "Policies: Non-Feed-in Tariff",
    # Governance
    "government_effectiveness" = "Government Effectiveness (log)",
    "polity2" = "Strength of Democracy",
    # Development
    "gdp_capita" = "GDP per capita (squared)",
    "unemployment" = "Unemployment Rate",
    "hdi" = "Human Development Index (reciprocal)",
    "age" = "Median Age Bracket (5 years)",
    "ratio_rural_urban" = "Ratio of Rural to Urban Residents",
    "pop" = "Population (log)",
    "income_levelUpper middle income" = "Income Level: Upper-Middle",
    "income_levelLower middle income" = "Income Level: Lower-Middle",
    "income_levelLow income" = "Income Level: Low",
    "regionSouth Asia" = "South Asia",
    "regionSub-Saharan Africa" = "Sub-Saharan Africa",
    "regionMiddle East & North Africa" = "Middle East & North Africa",              
    "regionEurope & Central Asia" = "Europe & Central Asia",
    "regionLatin America & Caribbean" = "Latin America & Caribbean", 
    "regionNorth America" = "North America", 
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Path Dependence</b>" = 1:8,
    "<b>Curse of Development</b>" = 9:10,
    "<b>Policy Toolkits</b>" = 11:13,
    "<b>Quality of Governance</b>" = 14:15,
    "<b>Development</b>" = 16:21),
#    "<b>Income Groups</b>" = 22:24),
#    "<b>Geography</b>" = 25:30),
  include.fstat = TRUE,
  custom.gof.rows = list(
    "Mean VIF" =   list(m1a,m2a,m3a,m4a) %>%
  map(~car::vif(.)[,3]^2 %>% mean()) %>%
  unlist())
)
#rm(list = ls())
```

#### Lower-Middle

```{r}
# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
  # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents, fossil, pop, ratio_rural_urban, age),
            funs(scale(.) %>% as.numeric())) %>%
  filter(income_level == "Lower middle income") %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) 


# Let's run our simplest models as both fixed and random effects,
# and check the hausman test statistic
m1c <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "within", effect = "time", index = c("country", "year"))
m1d <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro + nuclear + biomass + 
             policy_any +  government_effectiveness + polity2 +
             gdp_capita + unemployment + hdi + oil_rents + fossil + 
             pop + ratio_rural_urban + age, 
           model = "random", effect = "time", index = c("country", "year"))
# Looks like the Hausman test is statistically significant,
# meaning that fixed effects are a better fit here.
# I think conceptually, they area better fit here too,
# since we're trying to directly account for variation over time
phtest(m1c, m1d)

remove(m1c,m1d)

m2c <- dat %>%
  plm::plm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
     model = "within", effect = "time", index = c("country", "year"))
m2d <- dat %>%
  plm::plm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
     model = "random", effect = "time", index = c("country", "year"))

phtest(m2c,m2d)
remove(m2c,m2d)
# So in this case, random effects fits better, HOWEVER,
# the random effects have very little variance - rounded to 0.
# That's pretty weird, and usually indicates model singularity.
# In fact, plm doesn't have that warning, but when we run it as a lme4 model,
# it warns the user that we shouldn't use that model due to singularity.
# Instead, we use fixed effects for these cases as well.

m1a <- dat %>%
  lm(formula = equity ~ renewables + hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
#(m1a %>% car::vif())^2
m2a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + nuclear + biomass + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))

m3a <- dat %>%
  lm(formula = equity ~ renewables +
             hydro + nuclear + biomass +
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
m4a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + nuclear + biomass + 
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
# Pretty solid VIF!
(m4a %>% car::vif())^2

# Let's make a table
texreg::htmlreg(
  list(m1a,m2a,m3a,m4a),
  file = "table/table_4A.html",
  caption.above = TRUE,
  caption = "<b>OLS Panel Models of Social Equity Scores for Lower-Medium Income Countries</b><br><i>for each of 26 Countries over 26 Years, with Annual Fixed Effects",
  custom.header = list("Any RE Promotion Policy" = 1:2, 
                       "RE Promotion Policies by Type" = 3:4),
  custom.model.names = c("RE Overall",
                         "RE by Type",
                         "RE Overall",
                         "RE by Type"),
  custom.note = "*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 10. Each model includes fixed effects by year. In these models, random effects had uncomfortably low levels of variance among random effects, typically a sign of model singularity and poor fit. As a result, we used fixed effects instead. All coefficients represent the change in equity scores given a 1 standard deviation increase in the predictor.",
  stars = c(0.001, 0.01, 0.05, 0.10),
  bold = 0.10,
  single.row = TRUE,
  custom.coef.map = list(
    # Technologies
    "renewables" = "Renewable Energy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal",
    "biomass" = "Biomass",
    "hydro" = "Hydro (cubed)",
    "nuclear" = "Nuclear",
        # Resource Curse
        "oil_rents" = "Oil Rents (% of GDP)",
    "fossil" = "Fossil Fuels",
    # Policies
    "policy_any"= "Policies: Any",
    "policy_fit" = "Policies: Feed-in Tariff",
    "policy_nonfit" = "Policies: Non-Feed-in Tariff",
    # Governance
    "government_effectiveness" = "Government Effectiveness (log)",
    "polity2" = "Strength of Democracy",
    # Development
    "gdp_capita" = "GDP per capita (squared)",
    "unemployment" = "Unemployment Rate",
    "hdi" = "Human Development Index (reciprocal)",
    "age" = "Median Age Bracket (5 years)",
    "ratio_rural_urban" = "Ratio of Rural to Urban Residents",
    "pop" = "Population (log)",
    "income_levelUpper middle income" = "Income Level: Upper-Middle",
    "income_levelLower middle income" = "Income Level: Lower-Middle",
    "income_levelLow income" = "Income Level: Low",
    "regionSouth Asia" = "South Asia",
    "regionSub-Saharan Africa" = "Sub-Saharan Africa",
    "regionMiddle East & North Africa" = "Middle East & North Africa",              
    "regionEurope & Central Asia" = "Europe & Central Asia",
    "regionLatin America & Caribbean" = "Latin America & Caribbean", 
    "regionNorth America" = "North America", 
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Path Dependence</b>" = 1:7,
    "<b>Curse of Development</b>" = 8:9,
    "<b>Policy Toolkits</b>" = 10:12,
    "<b>Quality of Governance</b>" = 13:14,
    "<b>Development</b>" = 15:20),
#    "<b>Income Groups</b>" = 22:24),
#    "<b>Geography</b>" = 25:30),
  include.fstat = TRUE,
  custom.gof.rows = list(
    "Mean VIF" =   list(m1a,m2a,m3a,m4a) %>%
  map(~car::vif(.)[,3]^2 %>% mean()) %>%
  unlist())
)

# In these models, our random effects do worse, but our fixed effects models appear fine.
# We had to remove tidal_iea due to too few valid observations
```


#### Low

I tried to model low income countries, but there are only 4 of them, 
so this is a very suspect way to do things. Instead, we'll focus just on the other categories which have ample observations.

```{r}
# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
  # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents, fossil, pop, ratio_rural_urban, age),
            funs(scale(.) %>% as.numeric())) %>%
  filter(income_level == "Low income") %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) 

m1c <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro +  biomass + #nuclear +
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
       model = "within", effect = "time", index = c("country", "year"))
m1d <- dat %>%
  plm::plm(formula = equity ~ renewables + hydro +  biomass + #nuclear +
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
       model = "random", effect = "time", index = c("country", "year"))
phtest(m1c,m1d)
# Fixed fits better.
remove(m1c,m1d)


m2c <- dat %>%
  plm::plm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + biomass + #nuclear + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
       model = "within", effect = "time", index = c("country", "year"))
m2d <- dat %>%
  plm::plm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + biomass + #nuclear + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age,
       model = "random", effect = "time", index = c("country", "year"))
phtest(m2c,m2d)
# Fixed fitsbetter
remove(m2c,m2d)
m1a <- dat %>%
  lm(formula = equity ~ renewables + hydro +  biomass + #nuclear +
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
#(m1a %>% car::vif())^2

m2a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro + biomass + #nuclear + 
       policy_any +  government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
m3a <- dat %>%
  lm(formula = equity ~ renewables +
             hydro + biomass +#nuclear + 
             policy_nonfit + #policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
m4a <- dat %>%
  lm(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro +  biomass +#nuclear +  
             policy_nonfit + #policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year))
# Pretty solid VIF!
#(m4a %>% car::vif())^2

# Let's make a table
texreg::htmlreg(
  list(m1a,m2a,m3a,m4a),
  file = "table/table_5A.html",
  caption.above = TRUE,
  caption = "<b>OLS Panel Models of Social Equity Scores for Low Income Countries</b><br><i>for each of 4 Countries over 26 Years, with Annual Fixed Effects",
  custom.header = list("Any RE Promotion Policy" = 1:2, 
                       "RE Promotion Policies by Type" = 3:4),
  custom.model.names = c("RE Overall",
                         "RE by Type",
                         "RE Overall",
                         "RE by Type"),
  custom.note = "*** p < 0.001, ** p < 0.01, * p < 0.05, . p < 10. Each model includes fixed effects by year. According to a statistically significant Hausman test statistic, the fixed effects fit better than random effects. As a result, we present the coefficients from the fixed effects models aboves. All coefficients represent the change in equity scores given a 1 standard deviation increase in the predictor.",
  stars = c(0.001, 0.01, 0.05, 0.10),
  bold = 0.10,
  single.row = TRUE,
  custom.coef.map = list(
    # Technologies
    "renewables" = "Renewable Energy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal",
    "biomass" = "Biomass",
    "hydro" = "Hydro (cubed)",
    "nuclear" = "Nuclear",
        # Resource Curse
        "oil_rents" = "Oil Rents (% of GDP)",
    "fossil" = "Fossil Fuels",
    # Policies
    "policy_any"= "Policies: Any",
    "policy_fit" = "Policies: Feed-in Tariff",
    "policy_nonfit" = "Policies: Non-Feed-in Tariff",
    # Governance
    "government_effectiveness" = "Government Effectiveness (log)",
    "polity2" = "Strength of Democracy",
    # Development
    "gdp_capita" = "GDP per capita (squared)",
    "unemployment" = "Unemployment Rate",
    "hdi" = "Human Development Index (reciprocal)",
    "age" = "Median Age Bracket (5 years)",
    "ratio_rural_urban" = "Ratio of Rural to Urban Residents",
    "pop" = "Population (log)",
    "income_levelUpper middle income" = "Income Level: Upper-Middle",
    "income_levelLower middle income" = "Income Level: Lower-Middle",
    "income_levelLow income" = "Income Level: Low",
    "regionSouth Asia" = "South Asia",
    "regionSub-Saharan Africa" = "Sub-Saharan Africa",
    "regionMiddle East & North Africa" = "Middle East & North Africa",              
    "regionEurope & Central Asia" = "Europe & Central Asia",
    "regionLatin America & Caribbean" = "Latin America & Caribbean", 
    "regionNorth America" = "North America", 
    "(Intercept)" = "Constant"),
  groups = list(
    "<b>Path Dependence</b>" = 1:6,
    "<b>Curse of Development</b>" = 7:8,
    "<b>Policy Toolkits</b>" = 9:11,
    "<b>Quality of Governance</b>" = 12:13,
    "<b>Development</b>" = 14:19),
#    "<b>Income Groups</b>" = 22:24),
#    "<b>Geography</b>" = 25:30),
  include.fstat = TRUE,
  custom.gof.rows = list(
    "Mean VIF" =   list(m1a,m2a,m3a,m4a) %>%
  map(~car::vif(.)[,3]^2 %>% mean()) %>%
  unlist())
)


# In these models, our random effects do worse, but our fixed effects models appear fine.
# We had to remove tidal_iea due to too few valid observations, as well as policy_fit
```

## 5. Visualization by Income Group

Let's also create several helpful visuals to interpret our income-group models.

#### Feed-in Tariffs

```{r}
library(tidyverse)
library(plm)
library(ggeffects)
library(Zelig)

# IBM Colorblind friendly scale
# https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
    # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents,fossil, pop, ratio_rural_urban, age),
            funs(scale(.)))

# Make a fixed effects model (since the results are nearly identical)
all <- dat %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")
hi <- dat %>%
  filter(income_level == "High income") %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")
um <- dat %>%
  filter(income_level == "Upper middle income") %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")
lm <- dat %>%
  filter(income_level == "Lower middle income") %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")

out <- bind_rows(

  bind_rows(
  all %>%
    setx(policy_fit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_fit, expected_value) %>%
    mutate(type = "\nAll Countries\n(n = 99)\n"),
  hi %>%
    setx(policy_fit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_fit, expected_value) %>%
    mutate(type = "High Income\nCountries\n(n = 40)\n"),
  um %>%
    setx(policy_fit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_fit, expected_value) %>%
    mutate(type = "Upper-Middle\nIncome Countries\n(n = 29)\n"),
  lm %>%
    setx(policy_fit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_fit, expected_value) %>%
    mutate(type = "Lower-Middle\nIncome Countries\n(n = 26)\n")
) %>%
  expand_grid(., bands = c(.999, .99, .95)) %>% 
  group_by(bands, policy, type) %>%
  summarize(median = median(expected_value),
            lower = quantile(expected_value, probs = (1 - bands[1]) / 2),
            higher = quantile(expected_value, probs = (1 - (1 - bands[1]) / 2))) %>%
  ungroup() %>%
  mutate(bands = (bands * 100) %>% paste(., "%", sep = "")) %>%
  mutate(class = "Feed-in Tariff Policy"),

bind_rows(
  all %>%
    setx(policy_nonfit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_nonfit, expected_value) %>%
    mutate(type = "\nAll Countries\n(n = 99)\n"),
  hi %>%
    setx(policy_nonfit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_nonfit, expected_value) %>%
    mutate(type = "High Income\nCountries\n(n = 40)\n"),
  um %>%
    setx(policy_nonfit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_nonfit, expected_value) %>%
    mutate(type = "Upper-Middle\nIncome Countries\n(n = 29)\n"),
  lm %>%
    setx(policy_nonfit = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(policy = policy_nonfit, expected_value) %>%
    mutate(type = "Lower-Middle\nIncome Countries\n(n = 26)\n")
) %>%
  expand_grid(., bands = c(.999, .99, .95)) %>% 
  group_by(bands, policy, type) %>%
  summarize(median = median(expected_value),
            lower = quantile(expected_value, probs = (1 - bands[1]) / 2),
            higher = quantile(expected_value, probs = (1 - (1 - bands[1]) / 2))) %>%
  ungroup() %>%
  mutate(bands = (bands * 100) %>% paste(., "%", sep = "")) %>%
  mutate(class = "Other RE Promotion Policy")
)

out %>%
  filter(bands == "95%") %>%
  mutate(type = factor(type, levels = c(
    "\nAll Countries\n(n = 99)\n",
    "High Income\nCountries\n(n = 40)\n",
    "Upper-Middle\nIncome Countries\n(n = 29)\n",
    "Lower-Middle\nIncome Countries\n(n = 26)\n"))) %>%
  ggplot(mapping = aes(x = policy, y = median, 
                       ymin = lower, ymax = higher, fill = type, color = type)) +
  geom_line() +
  geom_ribbon(alpha = 0.5) +
  facet_wrap(~class, ncol = 2) +
  scale_fill_manual(values = mycolors[c(2,3,4,5)]) +
    scale_color_manual(values = mycolors[c(2,3,4,5)]) +
  guides(color = FALSE) +
  labs(x = "# of Policies Adopted (Z-Score)", y = "Expected Social Equity Index Score",
       fill = "World Bank\nIncome Category") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  ggsave("viz/figure_4.png", width = 7, height =4, dpi = 500)
```


#### HDI

```{r}
library(tidyverse)
library(plm)
library(ggeffects)
library(Zelig)

# IBM Colorblind friendly scale
# https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

# Import Data
dat <- read_rds("data/dataset_filled.rds") %>%
  mutate(ratio_rural_urban = pop_rural / pop_urban) %>%
  # Let's take the reciprocal transformation of HDI 
  # (which basically is inverse HDI )
  # the higher inverse HDI, the lower real HDI
  mutate(hdi = 1/ hdi) %>%
  mutate(government_effectiveness = scales::rescale(government_effectiveness, to = c(1,10)) %>% log() ) %>%
  mutate(gdp_capita = gdp_capita ^2) %>%
  # Break association between hydro and fossil fuels, by cubing hydro
  mutate(hydro = hydro^3) %>%
  mutate(year = factor(year),
         income_levels = factor(income_level),
         region = factor(region),
         country = factor(country)) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
  # Turn all tallies of GWh of energy consumption into per capita measures
  mutate_at(vars(contains("_iea")), funs(. / pop * 100000)) %>%
    # Now transform population to avoid colinearity
  mutate(pop = log(pop)) %>%
  # rescale for easier interpretations
  mutate_at(vars(renewables, biomass, hydro, nuclear,
                 solar_iea, wind_iea, tidal_iea, geothermal_iea, hydro_iea,
                 policy_any, policy_fit, policy_nonfit,
                 policy_treasure, policy_organization,
                 policy_nodality, policy_authority,
                 government_effectiveness, polity2, gdp_capita,
                 unemployment, hdi,# emissions_ghg, 
                 oil_rents,fossil, pop, ratio_rural_urban, age),
            funs(scale(.)))

# Make a fixed effects model (since the results are nearly identical)
all <- dat %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")
hi <- dat %>%
  filter(income_level == "High income") %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")
um <- dat %>%
  filter(income_level == "Upper middle income") %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")
lm <- dat %>%
  filter(income_level == "Lower middle income") %>%
  as.data.frame() %>%
  zelig(formula = equity ~ solar_iea + wind_iea + 
             geothermal_iea + #tidal_iea + 
             hydro +  biomass + nuclear +  
             policy_nonfit + policy_fit +
         government_effectiveness + polity2 +
       gdp_capita + unemployment + hdi + oil_rents + fossil + 
       pop + ratio_rural_urban + age + factor(year), model = "ls")

out <- bind_rows(

  bind_rows(
  all %>%
    setx(hdi = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(hdi, expected_value) %>%
    mutate(type = "\nAll Countries\n(n = 99)\n"),
  hi %>%
    setx(hdi = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(hdi, expected_value) %>%
    mutate(type = "High Income\nCountries\n(n = 40)\n"),
  um %>%
    setx(hdi = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(hdi, expected_value) %>%
    mutate(type = "Upper-Middle\nIncome Countries\n(n = 29)\n"),
  lm %>%
    setx(hdi = seq(-2, 2, length.out = 10)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    select(hdi, expected_value) %>%
    mutate(type = "Lower-Middle\nIncome Countries\n(n = 26)\n")
) %>%
  expand_grid(., bands = c(.999, .99, .95)) %>% 
  group_by(bands, hdi, type) %>%
  summarize(median = median(expected_value),
            lower = quantile(expected_value, probs = (1 - bands[1]) / 2),
            higher = quantile(expected_value, probs = (1 - (1 - bands[1]) / 2))) %>%
  ungroup() %>%
  mutate(bands = (bands * 100) %>% paste(., "%", sep = ""))) 


# Let's write a quick function to backtransform from HDI
backtransform = function(myvector){
  # First let's calculate the reciprocal transformation of hdi
  mydata <- read_rds("data/dataset_filled.rds") %>%
    select(hdi) %>%
    mutate(hdi = 1/hdi)
  # and get the mean of the reciprocal transformation
  mu <- mean(mydata$hdi)
  # and the standard deviation
  sigma <- sd(mydata$hdi)
  
  # Now multiply your Z-score by the standard deviation and add back the mean
  unscaled <- (myvector * sigma) + mu
  
  # Now divide the reciprocal by itself again to get the original
  return(1 / unscaled)
}

# Generate raw data
raw <- bind_rows(
  dat %>%
    select(hdi, equity, income_level) %>%
    mutate(type = "\nAll Countries\n(n = 99)\n"),
  dat %>%
    select(hdi, equity, income_level) %>%
    filter(income_level != "Low income") %>%
    mutate(type = income_level %>% dplyr::recode(
      "High income" = "High Income\nCountries\n(n = 40)\n",
      "Upper middle income" = "Upper-Middle\nIncome Countries\n(n = 29)\n",
      "Lower middle income" = "Lower-Middle\nIncome Countries\n(n = 26)\n"))) %>%
   mutate(type = factor(type, levels = c(
    "\nAll Countries\n(n = 99)\n",
    "High Income\nCountries\n(n = 40)\n",
    "Upper-Middle\nIncome Countries\n(n = 29)\n",
    "Lower-Middle\nIncome Countries\n(n = 26)\n"))) %>%
 mutate(hdi = backtransform(hdi))



# Generate visual
out %>%
  filter(bands == "95%") %>%
  mutate(type = factor(type, levels = c(
    "\nAll Countries\n(n = 99)\n",
    "High Income\nCountries\n(n = 40)\n",
    "Upper-Middle\nIncome Countries\n(n = 29)\n",
    "Lower-Middle\nIncome Countries\n(n = 26)\n"))) %>%
  mutate(hdi = backtransform(hdi)) %>%
  ggplot(mapping = aes(x = hdi, y = median, 
                       ymin = lower, ymax = higher, fill = type, color = type)) +
  geom_line() +
  geom_ribbon(alpha = 0.5) +
  scale_fill_manual(values = mycolors[c(2,3,4,5)]) +
  scale_color_manual(values = mycolors[c(2,3,4,5)]) +
 # facet_wrap(~type) +
  guides(color = FALSE) +
  labs(x = "Human Development Index Score", y = "Expected Social Equity Index Score",
       fill = "World Bank\nIncome Category") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  ggsave("viz/figure_6.png", width = 7, height =4, dpi = 500)

g2 <- raw %>%
  ggplot(mapping = aes(x = hdi, y = equity, ymin = NA_real_, ymax = NA_real_,
                                        color = type)) +
  geom_jitter(alpha = 0.1) +
  geom_smooth(method = "lm") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
           labs(x = "Human Development Index Score", y = "Expected Social Equity Index Score",
       color = "World Bank\nIncome Category") 
```


### Policies

```{r}
# IBM Colorblind friendly scale
# https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

# Import policy data
dat <- read_rds("data/dataset_filled.rds") %>%
  select(code, country, year, income_level, equity, policy_any, policy_fit, renewables) %>%
  mutate(policy_nonfit = policy_any - policy_fit) %>%
#  pivot_longer(cols = contains("policy"), names_to = "type", values_to = "value") %>%
  mutate(equity_cat = ntile(equity, 4) %>% recode_factor(
    "4" = "High\nSocial Equity\n",
    "3" = "Upper-Middle\nSocial Equity\n",
    "2" = "Lower-Middle\nSocial Equity\n",
    "1" = "Low\nSocial Equity\n")) %>%
  mutate(policy = case_when(
    policy_fit > 0 & policy_nonfit > 0 ~ "Both",
    policy_fit > 0 & policy_nonfit == 0 ~ "Feed-in Tariff",
    policy_fit == 0 & policy_nonfit > 0 ~"Other RE Policy",
    policy_fit == 0 & policy_nonfit == 0 ~ "None") %>%
      factor(levels = c("Both", "Feed-in Tariff", "Other RE Policy", "None")))

# I want to know each year,
# What share of countries got above median equity?

dat %>%
  group_by(equity_cat, policy) %>%
  summarize(count = n()) %>%
  ungroup() %>%
  group_by(policy) %>%
  mutate(policy = paste(policy, "\n(n = ", sum(count), ")\n", sep = "") %>%
           factor(levels = c("Both\n(n = 529)\n",
                             "Feed-in Tariff\n(n = 104)\n",
                             "Other RE Policy\n(n = 722)\n",
                             "None\n(n = 1219)\n")))  %>%
  ggplot(mapping = aes(x = equity_cat, y = count, fill = policy)) +
  geom_col(position = "fill", size = 1.25, color = "white") +
  coord_flip()  +
  scale_y_continuous(breaks = c(0,.25,.5, .75, 1), labels = c(0, 25, 50, 75, 100)) +
  scale_fill_manual(values = rev(mycolors[c(2,3,4,5)])) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black")) +
  labs(x = "Social Equity Index Score",
       y = "% of Country-Years (n = 2574)",
       fill = "Renewable Energy\nPolicy Adoption")  +
  ggsave("viz/percentage_policies.png", width = 6, height = 3, dpi = 500)

dat %>%
  group_by(equity_cat, policy, year) %>%
  summarize(count = n()) %>%
  ungroup()  %>%
    group_by(policy) %>%
  mutate(policy = paste(policy, "\n(n = ", sum(count), ")\n", sep = "") %>%
           factor(levels = c("Both\n(n = 529)\n",
                             "Feed-in Tariff\n(n = 104)\n",
                             "Other RE Policy\n(n = 722)\n",
                             "None\n(n = 1219)\n")))  %>%
  ggplot(mapping = aes(x = year, y = count, fill = policy)) +
  geom_col(position = "fill", color = "white", size=  0.20) +
  facet_wrap(~equity_cat,ncol = 4) +
    scale_y_continuous(breaks = c(0,.25,.5, .75, 1), labels = c(0, 25, 50, 75, 100)) +
  scale_fill_manual(values = rev(mycolors[c(2,3,4,5)])) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  labs(x = "Year of Adoption (n = 26)",
       y = "% of Country-Years (n = 2574)",
       fill = "Renewable Energy\nPolicies in Effect")   +
 ggsave("viz/figure_3.png", width = 8, height = 3, dpi = 500)
```


```{r}

# IBM Colorblind friendly scale
# https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
mycolors <- c("#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000")

# Import policy data
dat <- read_rds("data/dataset_filled.rds") %>%
  select(code, country, year, income_level, equity, 
         renewables,
         solar_iea, wind_iea, geothermal_iea, tidal_iea) %>%
  pivot_longer(cols = c(renewables, solar_iea, wind_iea, geothermal_iea, tidal_iea),
               names_to = "type", values_to = "value") %>%
  mutate(equity_cat = ntile(equity, 4) %>% recode_factor(
    "4" = "High",
    "3" = "Upper-Middle",
    "2" = "Lower-Middle",
    "1" = "Low") %>%
      factor(levels = c("Low", "Lower-Middle", "Upper-Middle", "High"))) %>%
  group_by(type) %>%
  mutate(value = ntile(value, 2) %>% 
           recode_factor("1" = "Below Median",
                         "2" = "Above Median")) %>%
  mutate(type = type %>% recode_factor(
    "renewables" = "Renewable Energy",
    "solar_iea" = "Solar PV",
    "wind_iea" = "Wind",
    "geothermal_iea" = "Geothermal",
    "tidal_iea" = "Tidal") %>%
      factor(levels = c("Renewable Energy", "Solar PV", "Wind", "Geothermal", "Tidal")))

dat %>% 
  group_by(equity_cat, type, value) %>%
  summarize(count = n()) %>%
  ggplot(mapping = aes(x = equity_cat, y = count, fill = value)) +
  geom_col(position = "fill", color = "white", size = 1.25) +
  facet_grid(~type) +
  scale_fill_manual(values = mycolors[c(1,5)]) +
  labs(x = "Social Equity Index", y = "% of Country-Years (n = 2574)", fill = "Renewable Energy Adoption Level") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm"),
        legend.position = "bottom") +
  coord_flip() +
  scale_y_continuous(breaks = c(0, .25, .5, .75, 1), 
                     labels = c(0, 25, 50, 75, 100))  +
  ggsave("viz/percentage_renewables.png", dpi = 500, width = 9, height = 4)
```

